EX 1
MCMC analysis
library(coda)
metropolis_hastings.1dim <- function(func, mu_0, n_samp, sig, burn_in, thin) {
mu <- mu_0
n_acc <- 0
chain <- c()
chain <- append(chain,mu)
for (i in 1:n_samp){
new_mu <- rnorm(n=1, mu, sig)
bound <- min(func(new_mu)/func(mu),1)
dice <- runif(1,0,1)
if(dice<bound){
mu <- new_mu
if(i>burn_in){
chain <- append(chain,mu)
}
n_acc <- n_acc +1
}
else{
if(i>burn_in){
chain <- append(chain,mu)
}
}
}
results <- chain[seq(1,length(chain),thin)]
return( results )
}
library(parallel)
library(foreach)
library(doParallel)
x <- seq(-10,10,0.01)
post <- function(x){
return(0.5*dnorm(x = x, mean = -3, sd = 1) + 0.5*dnorm(x = x, mean = 3, sd = 1))
}
# I decided to use parallelization otherwise it would take hours
totalCores = detectCores()
# Leave one core free
cluster <- makeCluster(totalCores[1]-1)
# Start the cluster
registerDoParallel(cluster)
burn <- c(100,200,1000)
th <- c(5,25,50)
# Run on the cluster
for (i in burn){
for (j in th){
g_chain <- foreach(i = 1:200, .combine= append) %dopar% {
metropolis_hastings.1dim(post, 0, 10000, 1, i, j)
}
g_mcmc <- mcmc(g_chain)
h <- hist(g_mcmc,breaks=300,plot = FALSE)
h$counts <- h$counts/length(h$counts)
g <- post(x)
# plot(summary(g_mcmc))
plot(h, main = paste("Burn-in and thinning = ", i,",",j) )
plot(g_mcmc)
plot(acf(g_mcmc, lag.max = 1000, plot = FALSE)$acf)
cat("---------------------------------------------------------------------------")
}
}
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
## ---------------------------------------------------------------------------
# Stop cluster
stopCluster(cluster)
g_mcmc <- mcmc(g_chain)
cat("The best choice in terms of burn-in cycles and thinning seems to be a thinning of 5, that does not produce the peak at 0 present in the other cases. For what concern the burn-in, it would be better to have a quite robust burn-in, but at the same time we would have to generate more samples. The best trade-off in this case is burn-in of 200 cycles, that generate a auto correlation function which rapidly goes to 0")
## The best choice in terms of burn-in cycles and thinning seems to be a thinning of 5, that does not produce the peak at 0 present in the other cases. For what concern the burn-in, it would be better to have a quite robust burn-in, but at the same time we would have to generate more samples. The best trade-off in this case is burn-in of 200 cycles, that generate a auto correlation function which rapidly goes to 0
EX 2
library(R2jags)
Efficacy <- function(r_vax, n_vax, r_plac, n_plac ){
eff <- (r_plac/n_plac - r_vax/n_vax)/(r_plac/n_plac)
model <- "model{
# Likelihood
y ~ dbinom(Prob_vax, n_vax)
x ~ dbinom(Prob_placebo, n_plac)
Efficacy <- (Prob_placebo-Prob_vax)/Prob_placebo
# Prior
Prob_vax ~ dunif(0, 1)
Prob_placebo ~ dunif(0, 1)
}"
datalist <- list( y= r_vax, n_vax= n_vax, x= r_plac, n_plac= n_plac )
jm_vax <- jags.model(file= textConnection(model), data= datalist , n.chains = 3)
update(jm_vax, n.iter = 1000)
Nrep = 1000000
posterior_sample_eff <- coda.samples(jm_vax,
variable.names = c("Efficacy","Prob_vax","Prob_placebo"),
n.iter = Nrep)
# getting the results
sum <- summary(posterior_sample_eff)
quantile_2.5_vax <- sum$quantiles[3, 1]
quantile_97.5_vax <- sum$quantiles[3, 5]
quantile_2.5_plac <- sum$quantiles[2, 1]
quantile_97.5_plac <- sum$quantiles[2, 5]
quantile_2.5_eff <- sum$quantiles[1, 1]
quantile_97.5_eff <- sum$quantiles[1, 5]
mcmc_list <- as.mcmc.list(posterior_sample_eff)
#getting all the chains
sam_vax <- c(c(mcmc_list[[1]][,3]),c(mcmc_list[[2]][,3]),c(mcmc_list[[3]][,3]))
sam_pla <- c(c(mcmc_list[[1]][,2]),c(mcmc_list[[2]][,2]),c(mcmc_list[[3]][,2]))
eff <- c(c(mcmc_list[[1]][,1]),c(mcmc_list[[2]][,1]),c(mcmc_list[[3]][,1]))
# Plots
# Prob vax
hist(sam_vax, breaks = 100, main= "Probability of vaccinated people", xlab = "Prob", col="lightblue" )
abline(v=c(quantile_2.5_vax,quantile_97.5_vax), lty=c(2,2), col=c("red","red"))
text(x=quantile_97.5_vax*1.05, y=100000, paste("Upp 95% bound=",round(quantile_97.5_vax,4)), col="darkred" ,cex=1)
text(x=quantile_2.5_vax*0.95, y=100000, paste("Low 95% bound=",round(quantile_2.5_vax,4)), col="darkred" ,cex=1)
# Prob plac
hist(sam_pla, breaks = 100, main= "Probability of placebo people", xlab = "Prob",col="lightblue")
abline(v=c(quantile_2.5_plac,quantile_97.5_plac), lty=c(2,2), col=c("red","red"))
text(x=quantile_97.5_plac*1.05, y=100000, paste("Upp 95% bound=",round(quantile_97.5_plac,4)), col="darkred" ,cex=1)
text(x=quantile_2.5_plac*0.95, y=100000, paste("Low 95% bound=",round(quantile_2.5_plac,4)), col="darkred" ,cex=1)
# Efficacy
hist(eff, breaks = 100, main= "Efficacy", xlab = "Efficacy" ,col="lightblue")
abline(v=c(quantile_2.5_eff,quantile_97.5_eff), lty=c(2,2), col=c("red","red"))
text(x=quantile_97.5_eff, y=100000, paste("Upp 95% bound=",round(quantile_97.5_eff,4)), col="darkred" ,cex=1)
text(x=quantile_2.5_eff*0.95, y=100000, paste("Low 95% bound=",round(quantile_2.5_eff,4)), col="darkred" ,cex=1)
return(posterior_sample_eff)
}
EX 2.1
JCOVDEN
This study involved over 44,000 people. Half received a single dose of the vaccine and half were given placebo (a dummy injection). People did not know if they had been given Jcovden or placebo. The trial found a 67% reduction in the number of symptomatic COVID-19 cases after 2 weeks in people who received Jcovden (116 cases out of 19,630 people) compared with people given placebo (348 of 19,691 people). This means that the vaccine had a 67% efficacy.
library(coda)
library(rjags)
library(tidybayes)
r_vax <- 116
n_vax <- 19630
r_plac <- 348
n_plac <- 19691
cat("The Efficacy of the Jansenn vaccine is", 100*round((r_plac/n_plac-r_vax/n_vax)/(r_plac/n_plac),3), "%, as declared by the company. \n" )
## The Efficacy of the Jansenn vaccine is 66.6 %, as declared by the company.
eff_Jansenn <- Efficacy(r_vax, n_vax, r_plac, n_plac )
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 2
## Unobserved stochastic nodes: 2
## Total graph size: 10
##
## Initializing model
MODERNA
The trial showed a 94.1% reduction in the number of symptomatic COVID-19 cases in the people who received the vaccine (11 out of 14,134 vaccinated people got COVID-19 with symptoms) compared with people who received dummy injections (185 out of 14,073 people who received dummy injections got COVID-19 with symptoms). This means that the vaccine demonstrated a 94.1% efficacy in the trial.
r_vax = 11
n_vax = 14134
r_plac = 185
n_plac = 14073
cat("The Efficacy of the Moderna vaccine is ", 100*round((r_plac/n_plac-r_vax/n_vax)/(r_plac/n_plac),3), "%, as declared by the company. \n" )
## The Efficacy of the Moderna vaccine is 94.1 %, as declared by the company.
eff_Moderna <- Efficacy(r_vax, n_vax, r_plac, n_plac )
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 2
## Unobserved stochastic nodes: 2
## Total graph size: 10
##
## Initializing model
Ex 3
Vaccinations report
library(tidyverse)
library(lubridate)
vacc <- read.csv("vaccinations.csv")
tot_vac <- subset(vacc, location == c("World","Europe","Asia","Africa","North America","South America", "Oceania"))[c("location","date","total_vaccinations")]
tot_vac$date <- as.Date( tot_vac$date)
day_vac <- subset(vacc, location == c("World","Europe","Asia","Africa","North America","South America", "Oceania"))[c("location","date","daily_people_vaccinated")]
day_vac$date <- as.Date( day_vac$date)
# Compute the week average
w <- subset(vacc, location == c("World"))[c("date","daily_people_vaccinated")]
w$date <- as.Date( w$date)
w$week_num <- strftime(w$date, format = "%V")
w$year <- strftime(w$date, format = "%G")
week_avg <- w %>% group_by(week_num,year) %>% summarise(week_average=mean(daily_people_vaccinated), .groups='drop')
week_avg <- week_avg %>% mutate(x = make_date(year)) %>% mutate(date = x + lubridate::weeks(week_num))
week_avg <- week_avg[order(week_avg$year,week_avg$week_num),]
# Plot the results
day_doses <- subset(vacc, location == c("World","Europe","Asia","Africa","North America","South America", "Oceania"))[c("location","date","daily_vaccinations")]
day_doses$date <- as.Date( day_doses$date)
ggplot(data=tot_vac, mapping = aes(x = date, y = total_vaccinations)) + geom_line(aes(color=location)) + ggtitle("Cumulative distribution of vaccinated people for Covid-19") + ylab("Number of people")
ggplot(data=week_avg, aes(x=date, y=week_average)) + geom_line() + ggtitle("Distribution of vaccinated people for Covid-19 in the World, weekly averaged") + ylab("Number of people")
ggplot(data=day_vac, mapping = aes(x = date, y = daily_people_vaccinated)) + geom_line(aes(color=location)) + ggtitle("Distribution of vaccinated people for Covid-19, daily") + ylab("Number of people")
ggplot(data=day_doses, mapping = aes(x = date, y = daily_vaccinations)) + geom_line(aes(color=location)) + ggtitle("Distribution of administered doses for Covid-19") + ylab("Number of doses")
Deaths report
death <- read_csv("total_deaths.csv")
death$date <- as.Date(death$date)
week_death <- read_csv("weekly_deaths.csv")
week_death$date <- as.Date(week_death$date)
ggplot(data= death, aes(date)) + geom_line(aes(y= World, color = "World")) +
geom_line(aes(y= Europe, color = "Europe")) +
geom_line(aes(y= Asia, color = "Asia")) +
geom_line(aes(y= Africa, color = "Africa")) +
geom_line(aes(y= Oceania, color = "Oceania")) +
geom_line(aes(y= North_America, color = "North America")) +
geom_line(aes(y= South_America, color = "South America")) + ggtitle("Cumulative distribution of death by Covid-19") + ylab("Number of people")
ggplot(data= week_death, aes(date)) + geom_line(aes(y= World, color = "World")) +
geom_line(aes(y= Europe, color = "Europe")) +
geom_line(aes(y= Asia, color = "Asia")) +
geom_line(aes(y= Africa, color = "Africa")) +
geom_line(aes(y= Oceania, color = "Oceania")) +
geom_line(aes(y= North_America, color = "North America")) +
geom_line(aes(y= South_America, color = "South America")) + ggtitle(" Distribution of death by Covid-19, weekly averaged") + ylab("Number of people")